package Job

import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

object FemaleMostTop {
  def main(args: Array[String]): Unit = {
    val sc: SparkContext = new SparkContext("local[*]", "FemaleMostTop")
    val inputPath: String = "file/JobData/input"
    val outputPath: String = "file/JobData/output/FemaleMostTop"

    val InfoLine: RDD[String] = sc.textFile(inputPath)
      .filter(x => x.split(",")(7)=="女")
      .repartition(2)

    val res = InfoLine.map(x =>{
      (x.split(",")(2),x.split(",")(0),x.split(",")(1).toInt)
    }).sortBy(_._3,ascending = false)
      .take(5)

    res.foreach(println)
    sc.stop()
  }
}
